#include "cell.h"
#include <cstdint>
#include <qthread.h>
#ifdef __sw_host__
extern void slave_gather_guest_x_cpe(cellgrid_t *grid);
void gather_guest_x_sw(cellgrid_t *grid){
  qthread_spawn(slave_gather_guest_x_cpe, grid);
  qthread_join();
}
#endif
#ifdef __sw_slave__

#include <qthread_slave.h>
//#include "dmapp.hpp"
#include "memptr.hpp"
#include "swarch.h"
#include <stdio.h>
void gather_guest_x_cpe(cellgrid_t *g_grid){
  cellgrid_t grid = fetch_ptr(g_grid);
  cell_x_cache<64, 8> xcache(grid.cells);
  int nneighbor = hdcell(grid.nn, grid.nn, grid.nn, grid.nn) + 1;
  FOREACH_LOCAL_CELL_CPE_RR(&grid, cx, cy, cz, icell) {
    cellmeta_t imeta = fetch_ptr((cellmeta_t*)&icell->basis);

    auto first_guest_cell = array_in(icell->first_guest_cell, nneighbor + 1);
    auto guest_id = array_in(icell->guest_id, imeta.nguest);
    auto guest_x = array_out<CELL_CAP>(icell->x, imeta.nguest);
    auto guest_f = array_out<CELL_CAP>(icell->f, imeta.nguest);
    FOREACH_NEIGHBOR(&grid, cx, cy, cz, dx, dy, dz, jcell){
      int did = hdcell(grid.nn, dx, dy, dz);
      int joff = get_offset_xyz<true>(grid, cx + dx, cy + dy, cz + dz);
      cellmeta_t jmeta = fetch_ptr((cellmeta_t*)&jcell->basis);
      vec<real> dcell = jmeta.basis - imeta.basis;
      if (first_guest_cell[did] == first_guest_cell[did + 1]) continue;
      for (int i = first_guest_cell[did]; i < first_guest_cell[did + 1]; i ++){
        vec<real> x = xcache(joff, guest_id[i]);
        guest_x[i] = xcache(joff, guest_id[i]) + dcell;
        guest_f[i] = 0;
      }
    }
  }
}
#endif
